#############################
# Descriptive Plots #####
#############################
# created by Franziska Quoß
# franziska.quoss@gesis.org

rm(list = ls())
# load libraries
library(rio)
library(tidyverse)
library(tidyr)
library(RColorBrewer)
library(scales)
library(plyr)
library(marginaleffects)
library(ggpubr)
library(grid)
library(agrmt)
library(gt)

# set working directory to replicationi folder
setwd("C:/Users/quossfa/Dropbox/Env_Voting/code/replication")

# import data ####
w4 <- import("w4_final.csv")
stopifnot(dim(w4)==c(6268, 358))

# Figure 1 ####

# create dataset of one row for each candidate that was shown in any of the cantons
# choice of candidates depending on party 
w4_cand <- w4 %>% pivot_longer(cols = c(w4_cand1_id:w4_cand18_leftright, 
                                        w4_cand1_SV_diff_abs:w4_cand18_LR_diff_abs,
                                        w4_cand1_PID_align:w4_cand18_PID_align),
                               names_to = c("w4_cand", ".value"),
                               # values_to = c("id", "listplace", "firstname", "lastname",
                               #               "age", "location", "partyid", "partyname",
                               #               "partyenv", "envscore", "leftright",
                               names_pattern = "(w4_cand\\d+_)(.+)",
                               # names_sep = "w4_cand\\d{1,2}_",
                               values_drop_na = T)

w4_cand$SV_diff_abs <- NULL
w4_cand$LR_diff_abs <- NULL
w4_cand$PID_align <- NULL
# keep only candidate variables
start <- which(colnames(w4_cand)=="id")
end <- which(colnames(w4_cand)=="leftright")
w4_cand <- w4_cand[,start:end]
# keep only unique candidates
w4_cand <- w4_cand[unique(w4_cand$id),]
# 1118 observations remaining
rm(start); rm(end)

# Environmental Score versus LiRe Score respondents #

# dataset with positions of candidates depending on party
w4_cand_party <- w4_cand %>%
                  dplyr::group_by(partyid) %>%
                  dplyr::summarise(mean_env = mean(envscore, na.rm = T),
                            low_env = quantile(envscore, 0.1),
                            high_env = quantile(envscore, 0.9),
                            mean_lr = mean(leftright, na.rm = T),
                            low_lr = quantile(leftright, 0.1),
                            high_lr = quantile(leftright, 0.9))
# only keep parties for whom we have pid
w4_cand_party$party_name[w4_cand_party$partyid==1] <- "CVP" 
w4_cand_party$party_name[w4_cand_party$partyid==2] <- "FDP" 
w4_cand_party$party_name[w4_cand_party$partyid==3] <- "SVP" 
w4_cand_party$party_name[w4_cand_party$partyid==4] <- "SP" 
w4_cand_party$party_name[w4_cand_party$partyid==5] <- "Greens" 
w4_cand_party$party_name[w4_cand_party$partyid==6] <- "glp" 
w4_cand_party$party_name[w4_cand_party$partyid==7] <- "BDP" 
w4_cand_party$party_name[w4_cand_party$partyid==8] <- "CSP" 
w4_cand_party$party_name[w4_cand_party$partyid==9] <- "Lega" 
w4_cand_party$party_name[w4_cand_party$partyid==10] <- "PdA" 

# color scheme
heat_colours <- brewer.pal(9, "YlGn")
  
ggplot(data = w4_cand_party[1:9,], aes(mean_lr, mean_env)) +  
  geom_density_2d_filled(data = w4, 
                         aes(w4_q22_rescaled, env_score_ego),
                          bins=7) +
  scale_fill_manual(values = c(heat_colours), aesthetics = c("fill")) +
    theme(legend.position="none", aspect.ratio=1,
        panel.background = element_rect(fill = "white",
                                        colour = "white",
                                        size = 0.5, linetype = "solid"),
        panel.grid.major = element_line(size = 0.5, linetype = 'solid',
                                        colour = "white"), 
        panel.grid.minor = element_line(size = 0.25, linetype = 'solid',
                                        colour = "white")) +
  labs(x = "Left-right score\n(0 = left, 10 = right)",
       y = "Environmental score\n(0 = no more env pol, 10 = much more env policies)",
       caption = "points = party mean scores based on all presented candidates,
       crosses represent 80% of Candidates") + 
  geom_point() +
  geom_pointrange(aes(ymin = low_env, ymax = high_env)) +
  geom_pointrange(aes(xmin = low_lr, xmax = high_lr)) +
  geom_text(data = w4_cand_party[c(1:4,6,8:9),], label = w4_cand_party$party_name[c(1:4,6,8:9)],
            nudge_x = 0.5, nudge_y = -0.2, colour = "black") +
  geom_text(data = w4_cand_party[5,], label = w4_cand_party$party_name[5],
            nudge_x = 0.9, nudge_y = -0.2, colour = "black") +
  geom_text(data = w4_cand_party[7,],label = w4_cand_party$party_name[7],
            nudge_x = -0.5, nudge_y = 0.2, colour = "black")

ggsave("Figures/Figure1.eps",
        width =5, height = 5, dpi = 400)

rm(w4_cand_party)

# correlation for respondents: -0.55
cor(w4$w4_q22_rescaled, w4$env_score_ego, use = "pairwise.complete.obs")

# Figure A.1 ####
# no of votes per party
w4_long <- w4 %>% pivot_longer(cols = contains("w4_q33"),
                               names_to = "position",
                               values_to = "votes",
                               values_drop_na = TRUE)

# keep only the one with votes
w4_long <- w4_long[w4_long$votes>0,]
# 15733 votes overall

# fill in elected party
w4_long$elected_party <- NA

w4_long$elected_party[w4_long$position=="w4_q33x1"] <- w4_long$w4_cand1_partyname[w4_long$position=="w4_q33x1"]
w4_long$elected_party[w4_long$position=="w4_q33x2"] <- w4_long$w4_cand2_partyname[w4_long$position=="w4_q33x2"]
w4_long$elected_party[w4_long$position=="w4_q33x3"] <- w4_long$w4_cand3_partyname[w4_long$position=="w4_q33x3"]
w4_long$elected_party[w4_long$position=="w4_q33x4"] <- w4_long$w4_cand4_partyname[w4_long$position=="w4_q33x4"]
w4_long$elected_party[w4_long$position=="w4_q33x5"] <- w4_long$w4_cand5_partyname[w4_long$position=="w4_q33x5"]
w4_long$elected_party[w4_long$position=="w4_q33x6"] <- w4_long$w4_cand6_partyname[w4_long$position=="w4_q33x6"]
w4_long$elected_party[w4_long$position=="w4_q33x7"] <- w4_long$w4_cand7_partyname[w4_long$position=="w4_q33x7"]
w4_long$elected_party[w4_long$position=="w4_q33x8"] <- w4_long$w4_cand8_partyname[w4_long$position=="w4_q33x8"]
w4_long$elected_party[w4_long$position=="w4_q33x9"] <- w4_long$w4_cand9_partyname[w4_long$position=="w4_q33x9"]
w4_long$elected_party[w4_long$position=="w4_q33x10"] <- w4_long$w4_cand10_partyname[w4_long$position=="w4_q33x10"]
w4_long$elected_party[w4_long$position=="w4_q33x11"] <- w4_long$w4_cand11_partyname[w4_long$position=="w4_q33x11"]
w4_long$elected_party[w4_long$position=="w4_q33x12"] <- w4_long$w4_cand12_partyname[w4_long$position=="w4_q33x12"]
w4_long$elected_party[w4_long$position=="w4_q33x13"] <- w4_long$w4_cand13_partyname[w4_long$position=="w4_q33x13"]
w4_long$elected_party[w4_long$position=="w4_q33x14"] <- w4_long$w4_cand14_partyname[w4_long$position=="w4_q33x14"]
w4_long$elected_party[w4_long$position=="w4_q33x15"] <- w4_long$w4_cand15_partyname[w4_long$position=="w4_q33x15"]
w4_long$elected_party[w4_long$position=="w4_q33x16"] <- w4_long$w4_cand16_partyname[w4_long$position=="w4_q33x16"]
w4_long$elected_party[w4_long$position=="w4_q33x17"] <- w4_long$w4_cand17_partyname[w4_long$position=="w4_q33x17"]
w4_long$elected_party[w4_long$position=="w4_q33x18"] <- w4_long$w4_cand18_partyname[w4_long$position=="w4_q33x18"]

table(w4_long$elected_party, exclude = NULL)

w4_par <- w4_long %>% 
  group_by(elected_party) %>% 
  dplyr::summarise(sum_votes = sum(votes)) %>%
  mutate(elected_party = fct_reorder(elected_party, sum_votes, .desc = T))

w4_par$elected_party <- revalue(w4_par$elected_party, c("Grünliberale"="glp",
                                                        "Grüne"="GPS"))

ggplot(data = w4_par, aes(elected_party, sum_votes)) +
  geom_col(fill="#385C9B") +
  labs(caption = "",
       x="",
       y= "votes") +
  theme(panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linetype = "dashed"),
        axis.text.x =element_text(size=10),
        axis.text.y =element_text(size=10))
ggsave("Figures/FigureA1.eps",
       width = 10, height = 6)

# Figure A.2 ####
# no of votes for PID party 

# for all respondents whose PID is one of the parties included in the experiment
table(w4$w4_PID_align_sum, exclude = NULL)

ggplot(data = w4, aes(w4_PID_align_sum)) + 
  geom_bar(fill= "#385C9B", aes(y = (..count..)/sum(..count..))) + 
  scale_y_continuous(labels = label_percent(accuracy = 0.1)) +
  #scale_x_continuous(breaks = seq(1,9,1), labels = labels_q26) +
  labs(caption = "(includes all respondents with a PID which was included in the experiment)",
       x="Number of votes to party that aligns with PID", 
       y= "Percent") +
  #theme(plot.caption = element_text(face = "italic"),
  theme(panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linetype = "dashed"),
        axis.text.x =element_text(size=10),
        axis.text.y =element_text(size=10))
ggsave("Figures/FigureA2.eps",
       width = 10, height = 6)

# Figure A.3 #### 
# Bivariate regression models predictions for Candidates #

# based on Smartvote data that is available via smartvote team
# www.smartvote.ch
cand <- import("do_not_upload/candidates_an.dta")

# some cleaning
cand$cleavage_6[cand$cleavage_6==-9] <- NA
cand$smartmap_x[cand$smartmap_x==-9] <- NA
cand$smartmap_x <- cand$smartmap_x * 10 # also code 0 - 10
cand$cleavage_6 <- cand$cleavage_6 / 10 # also code 0 - 10
cand <- cand[cand$candidate_NR==1,] # only NR candidates
cand$age[cand$age==-9] <- NA

# binary age:
cand$age_dich[cand$age <= 45] <- "17-45"
cand$age_dich[cand$age > 45] <- "above 45"
cand$age_dich <- as.factor(cand$age_dich)

cand$gender <- as.factor(cand$gender)
levels(cand$gender) <- c("Female", "Male")

cand$ruralcat <- as.factor(cand$ruralcat)
levels(cand$ruralcat) <- c("Urban", "Peri-Urban", "Rural")

# pid
cand$partycat <- as.factor(cand$party_REC7)

## environment
# age
env_age_dich_cand <- lm(cleavage_6 ~ age_dich, data = cand)
env_age_dich_pid_cand <- lm(cleavage_6 ~ age_dich + partycat, data = cand)

# gender
env_sex_cand <- lm(cleavage_6 ~ gender, data = cand)
env_sex_pid_cand <- lm(cleavage_6 ~ gender + partycat, data = cand)

# rural-urban-semi
env_rur_cand <- lm(cleavage_6 ~ ruralcat, data = cand)
env_rur_pid_cand <- lm(cleavage_6 ~ ruralcat + partycat, data = cand)

## left-right
# age
lr_age_dich_cand <- lm(smartmap_x ~ age_dich, data = cand)
lr_age_dich_pid_cand <- lm(smartmap_x ~ age_dich + partycat, data = cand)

# gender
lr_sex_cand <- lm(smartmap_x ~ gender, data = cand)
lr_sex_pid_cand <- lm(smartmap_x ~ gender + partycat, data = cand)

# rural-urban-semi
lr_rur_cand <- lm(smartmap_x ~ ruralcat, data = cand)
lr_rur_pid_cand <- lm(smartmap_x ~ ruralcat + partycat, data = cand)


# Environment #
# plot marginal means age # 
mm_env_age_dich_cand <- marginalmeans(env_age_dich_cand)
mm_env_age_dich_cand$pid <- "without"
mm_env_age_dich_cand <- mm_env_age_dich_cand[, c(3,5,10:11,14)]

mm_env_age_dich_pid_cand <- marginalmeans(env_age_dich_pid_cand)
mm_env_age_dich_pid_cand$pid <- "with"
mm_env_age_dich_pid_cand <- mm_env_age_dich_pid_cand[, c(2, 3, 8, 9, 10)]

mm_env_age_plot_cand <- rbind(mm_env_age_dich_cand, mm_env_age_dich_pid_cand)
rm(mm_env_age_dich_cand); rm(mm_env_age_dich_pid_cand)
mm_env_age_plot_cand <- mm_env_age_plot_cand[1:4,]

plot1 <- ggplot(mm_env_age_plot_cand, aes(value, estimate, colour = pid)) +
  geom_point(aes(colour = pid, shape = pid), 
             position = position_dodge(0.2)) +
  geom_pointrange(aes(ymin = conf.low, ymax = conf.high, 
                      colour = pid, shape = pid), 
                  position = position_dodge(0.2)) +
  scale_color_manual("pid", values = c("#333333", "#E69F00"),
                     labels = c("with", "without")) +
  labs(x = "", y = "Environmental Score") + ylim(5.5, 8)  +
  theme(plot.margin=grid::unit(c(0,-1,0,0), "mm")) + 
  theme(legend.position = c(0.15, 0.85),
        panel.background = element_rect(fill = "transparent"),
        panel.grid.major.y = element_line(size = 0.5, linetype = 'dashed',
                                          colour = "grey")) 

# plot marginal means sex # 
mm_env_sex_cand <- marginalmeans(env_sex_cand)
mm_env_sex_cand$pid <- "without"
mm_env_sex_cand <- mm_env_sex_cand[, c(3,5,10:11,14)]

mm_env_sex_pid_cand <- marginalmeans(env_sex_pid_cand)
mm_env_sex_pid_cand$pid <- "with"
mm_env_sex_pid_cand <- mm_env_sex_pid_cand[, c(2, 3, 8, 9, 10)]

mm_env_sex_plot_cand <- rbind(mm_env_sex_cand, mm_env_sex_pid_cand); rm(mm_env_sex_cand); rm(mm_env_sex_pid_cand)
mm_env_sex_plot_cand <- mm_env_sex_plot_cand[1:4,]

plot2 <- ggplot(mm_env_sex_plot_cand, aes(value, estimate, colour = pid)) +
  geom_point(aes(colour = pid, shape = pid), 
             position = position_dodge(0.2)) +
  geom_pointrange(aes(ymin = conf.low, ymax = conf.high, 
                      colour = pid, shape = pid), 
                  position = position_dodge(0.2)) +
  scale_color_manual("pid", values = c("#333333", "#E69F00"),
                     labels = c("with", "without")) +
  labs(x = "", y = "") + ylim(5.5, 8)  +
  theme(plot.margin=grid::unit(c(0,-1,0,0), "mm")) + 
  theme(legend.position = "none",
        axis.text.y = element_blank(),
        panel.background = element_rect(fill = "transparent"),
        panel.grid.major.y = element_line(size = 0.5, linetype = 'dashed',
                                          colour = "grey")) 

# plot marginal means rur # 
mm_env_rur_cand <- marginalmeans(env_rur_cand)
mm_env_rur_cand$pid <- "without"
mm_env_rur_cand <- mm_env_rur_cand[, c(3,5,10:11,14)]

mm_env_rur_pid_cand <- marginalmeans(env_rur_pid_cand)
mm_env_rur_pid_cand$pid <- "with"
mm_env_rur_pid_cand <- mm_env_rur_pid_cand[, c(2, 3, 8, 9, 10)]

mm_env_rur_plot_cand <- rbind(mm_env_rur_cand, mm_env_rur_pid_cand)
rm(mm_env_rur_cand); rm(mm_env_rur_pid_cand)
mm_env_rur_plot_cand <- mm_env_rur_plot_cand[1:6,]

plot3 <- ggplot(mm_env_rur_plot_cand, aes(value, estimate, colour = pid)) +
  geom_point(aes(colour = pid, shape = pid), 
             position = position_dodge(0.2)) +
  geom_pointrange(aes(ymin = conf.low, ymax = conf.high, 
                      colour = pid, shape = pid), 
                  position = position_dodge(0.2)) +
  scale_color_manual("pid", values = c("#333333", "#E69F00"),
                     labels = c("with", "without")) +
  theme(plot.margin=grid::unit(c(0,-1,0,0), "mm")) + 
  labs(x = "", y = "") + ylim(5.5, 8)  +
  theme(legend.position = "none",
        axis.text.y = element_blank(),
        panel.background = element_rect(fill = "transparent"),
        panel.grid.major.y = element_line(size = 0.5, linetype = 'dashed',
                                          colour = "grey")) 

ggarrange(plot1, NULL, plot2, NULL, plot3, nrow = 1,
          widths = c(1, -0.10, 1, -0.10, 1))
x = c(0.335, 0.335, 0.335, 0.655, 0.655, 0.655)
y = c(0.05, 0.5, 0.95, 0.05, 0.5, 0.95)
id = c(1,1,1, 2, 2, 2)
grid.polygon(x,y,id)

# ggsave("Figures/FigureA3a.eps",
#        width = 8, height = 4)
# doesn't save the grid.polygon (save manually via screenshot)
rm(plot1);rm(plot2); rm(plot3)


# Left-Right #
# plot marginal means age # 
mm_lr_age_dich_cand <- marginalmeans(lr_age_dich_cand)
mm_lr_age_dich_cand$pid <- "without"
mm_lr_age_dich_cand <- mm_lr_age_dich_cand[, c(3,5,10:11,14)]

mm_lr_age_dich_pid_cand <- marginalmeans(lr_age_dich_pid_cand)
mm_lr_age_dich_pid_cand$pid <- "with"
mm_lr_age_dich_pid_cand <- mm_lr_age_dich_pid_cand[, c(2, 3, 8, 9, 10)]

mm_lr_age_plot_cand <- rbind(mm_lr_age_dich_cand, mm_lr_age_dich_pid_cand)
rm(mm_lr_age_dich_cand); rm(mm_lr_age_dich_pid_cand)
mm_lr_age_plot_cand <- mm_lr_age_plot_cand[1:4,]

plot1 <- ggplot(mm_lr_age_plot_cand, aes(value, estimate, colour = pid)) +
  geom_point(aes(colour = pid, shape = pid), 
             position = position_dodge(0.2)) +
  geom_pointrange(aes(ymin = conf.low, ymax = conf.high, 
                      colour = pid, shape = pid), 
                  position = position_dodge(0.2)) +
  scale_color_manual("pid", values = c("#333333", "#E69F00"),
                     labels = c("with", "without")) +
  labs(x = "", y = "Left-Right Score") + ylim(2, 4)  +
  theme(plot.margin=grid::unit(c(0,-1,0,0), "mm")) + 
  theme(legend.position = c(0.15, 0.85),
        panel.background = element_rect(fill = "transparent"),
        panel.grid.major.y = element_line(size = 0.5, linetype = 'dashed',
                                          colour = "grey")) 

# plot marginal means sex # 
mm_lr_sex_cand <- marginalmeans(lr_sex_cand)
mm_lr_sex_cand$pid <- "without"
mm_lr_sex_cand <- mm_lr_sex_cand[, c(3,5,10:11,14)]

mm_lr_sex_pid_cand <- marginalmeans(lr_sex_pid_cand)
mm_lr_sex_pid_cand$pid <- "with"
mm_lr_sex_pid_cand <- mm_lr_sex_pid_cand[, c(2, 3, 8, 9, 10)]

mm_lr_sex_plot_cand <- rbind(mm_lr_sex_cand, mm_lr_sex_pid_cand); rm(mm_lr_sex_cand); rm(mm_lr_sex_pid_cand)
mm_lr_sex_plot_cand <- mm_lr_sex_plot_cand[1:4,]

plot2 <- ggplot(mm_lr_sex_plot_cand, aes(value, estimate, colour = pid)) +
  geom_point(aes(colour = pid, shape = pid), 
             position = position_dodge(0.2)) +
  geom_pointrange(aes(ymin = conf.low, ymax = conf.high, 
                      colour = pid, shape = pid), 
                  position = position_dodge(0.2)) +
  scale_color_manual("pid", values = c("#333333", "#E69F00"),
                     labels = c("with", "without")) +
  labs(x = "", y = "") + ylim(2, 4)  +
  theme(plot.margin=grid::unit(c(0,-1,0,0), "mm")) + 
  theme(legend.position = "none",
        axis.text.y = element_blank(),
        panel.background = element_rect(fill = "transparent"),
        panel.grid.major.y = element_line(size = 0.5, linetype = 'dashed',
                                          colour = "grey")) 

# plot marginal means rur # 
mm_lr_rur_cand <- marginalmeans(lr_rur_cand)
mm_lr_rur_cand$pid <- "without"
mm_lr_rur_cand <- mm_lr_rur_cand[, c(3,5,10:11,14)]

mm_lr_rur_pid_cand <- marginalmeans(lr_rur_pid_cand)
mm_lr_rur_pid_cand$pid <- "with"
mm_lr_rur_pid_cand <- mm_lr_rur_pid_cand[, c(2, 3, 8, 9, 10)]

mm_lr_rur_plot_cand <- rbind(mm_lr_rur_cand, mm_lr_rur_pid_cand)
rm(mm_lr_rur_cand); rm(mm_lr_rur_pid_cand)
mm_lr_rur_plot_cand <- mm_lr_rur_plot_cand[1:6,]

plot3 <- ggplot(mm_lr_rur_plot_cand, aes(value, estimate, colour = pid)) +
  geom_point(aes(colour = pid, shape = pid), 
             position = position_dodge(0.2)) +
  geom_pointrange(aes(ymin = conf.low, ymax = conf.high, 
                      colour = pid, shape = pid), 
                  position = position_dodge(0.2)) +
  scale_color_manual("pid", values = c("#333333", "#E69F00"),
                     labels = c("with", "without")) +
  theme(plot.margin=grid::unit(c(0,-1,0,0), "mm")) + 
  labs(x = "", y = "") + ylim(2, 4)  +
  theme(legend.position = "none",
        axis.text.y = element_blank(),
        panel.background = element_rect(fill = "transparent"),
        panel.grid.major.y = element_line(size = 0.5, linetype = 'dashed',
                                          colour = "grey")) 

ggarrange(plot1, NULL, plot2, NULL, plot3, nrow = 1,
          widths = c(1, -0.10, 1, -0.10, 1))
x = c(0.335, 0.335, 0.335, 0.655, 0.655, 0.655)
y = c(0.05, 0.5, 0.95, 0.05, 0.5, 0.95)
id = c(1,1,1, 2, 2, 2)
grid.polygon(x,y,id)

ggsave("Figures/FigureA3b.eps",
       width = 8, height = 4)

# ggsave("plots/pred_values_lr.png",
#        width = 8, height = 4, dpi = 400)
# doesn't save the grid.polygon (save manually)
rm(plot1);rm(plot2); rm(plot3)


# Figure A.4 ####
# plot diff abs min
ggplot(data = w4, aes(SV_diff_abs_min)) + 
  geom_density(fill= "#385C9B", aes(y = (..count..)/sum(..count..))) +
  scale_y_continuous(labels = label_percent(accuracy = 0.1)) +
  labs(x = "",
       #x="minimal absolute difference between respondent and closest candidate - environmental score",
       y= "Percent") +
  #theme(plot.caption = element_text(face = "italic"),
  theme(panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linetype = "dashed"),
        axis.text.x =element_text(size=10),
        axis.text.y =element_text(size=10))

ggsave("Figures/FigureA4.eps",
       width = 10, height = 6)
summary(w4$SV_diff_abs_min)


# Figure A.5 ####
# plot diff abs min
ggplot(data = w4, aes(LR_diff_abs_min)) + 
  geom_density(fill= "#385C9B", aes(y = (..count..)/sum(..count..))) +
  #stat_ecdf(geom = "step") +
  scale_y_continuous(labels = label_percent(accuracy = 0.1)) +
  labs(x = "",
       #x="minimal absolute difference between respondent and closest candidate - left-right score",
       y= "Percent") +
  #theme(plot.caption = element_text(face = "italic"),
  theme(panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linetype = "dashed"),
        axis.text.x =element_text(size=10),
        axis.text.y =element_text(size=10))
ggsave("Figures/FigureA5.eps",
       width = 10, height = 6)

summary(w4$LR_diff_abs_min)


# Table A.2 ####
# sample representativeness #
# this part is not reproducible at home due to data protection #

w4_sample <- import("../../data/upanel_w4.dta")

# only those who were invited:
# cantons that did not receive the experiment:
# AI, AR, GL, NW; OW, UR
w4_sample <- w4_sample[w4_sample$srph_canton%in%c("AG", "BE", "BL", "BS",
                                                  "FR", "GE", "GR", "JU",
                                                  "LU", "NE", "SG", "SH",
                                                  "SO", "SZ", "TG", "TI",
                                                  "VD", "VS", "ZG", "ZH"),]

# only Swiss citizens eligible to vote
# some differences between self-reported eligibility to vote and official data:
table(w4_sample$srph_nationality==8100, w4_sample$w4_q24==3)

# register data only gives one citizenship, but multiple citizenships possible
# but not EVERYONE has double citizenship - exclude people with non-Swiss, small error
w4_sample <- w4_sample[w4_sample$srph_nationality==8100,]

# 15933 respondents were contacted and asked to fill out the experiment

# out of those, who replied in the experiment?
w4_sample$sum_exp <- w4_sample$w4_q33x1+w4_sample$w4_q33x2+w4_sample$w4_q33x3+w4_sample$w4_q33x4+w4_sample$w4_q33x5+
  w4_sample$w4_q33x6+w4_sample$w4_q33x7+w4_sample$w4_q33x8+w4_sample$w4_q33x9+w4_sample$w4_q33x10+w4_sample$w4_q33x11+w4_sample$w4_q33x12+
  w4_sample$w4_q33x13+w4_sample$w4_q33x14+w4_sample$w4_q33x15+w4_sample$w4_q33x16+w4_sample$w4_q33x17+w4_sample$w4_q33x18
table(w4_sample$sum_exp, exclude = NULL)

# everyone with at least 1.
w4_sample$env_vote[w4_sample$sum_exp>0] <- 1
w4_sample$env_vote[w4_sample$sum_exp<=0] <- 0
table(w4_sample$env_vote)

# 6888 filled out something (some not correctly, were removed later before analysis)

# # correctly filled out (1-3 votes):
# w4_sample$env_vote[w4_sample$sum_exp%in%c(1, 2, 3)] <- 1
# w4_sample$env_vote[w4_sample$sum_exp< 1] <- 0
# w4_sample$env_vote[w4_sample$sum_exp> 3] <- 0
# table(w4_sample$env_vote)
# # 6245

# Now Compare

# Summary Descriptives of Sampling frame #
library(stringr)

table(w4_sample$srph_gender, w4_sample$w4_q2)
w4_sample$srph_Female[w4_sample$srph_gender==2] <- 1
w4_sample$srph_Female[w4_sample$srph_gender==1] <- 0

table(w4_sample$srph_bday)
w4_sample$srph_Year <- str_sub(w4_sample$srph_bday, 1, 4) %>% as.numeric()

table(w4_sample$srph_bigreg)
# 1 Lake Geneva
# 2 Espace Mittelland
# 3 North-West
# 4 Zurich
# 5 East
# 6 Central
# 7 Ticino

w4_sample$'Lake Geneva' <- ifelse(w4_sample$srph_bigreg==1, 1, 0)
w4_sample$'Espace Mittelland' <- ifelse(w4_sample$srph_bigreg==2, 1, 0)
w4_sample$'North-West' <- ifelse(w4_sample$srph_bigreg==3, 1, 0)
w4_sample$Zurich <- ifelse(w4_sample$srph_bigreg==4, 1, 0)
w4_sample$East <- ifelse(w4_sample$srph_bigreg==5, 1, 0)
w4_sample$Central <- ifelse(w4_sample$srph_bigreg==6, 1, 0)
w4_sample$Ticino <- ifelse(w4_sample$srph_bigreg==7, 1, 0)

table(w4_sample$srph_lang)
w4_sample$German <- ifelse(w4_sample$srph_lang=="de", 1, 0)
w4_sample$French <- ifelse(w4_sample$srph_lang=="fr", 1, 0)
w4_sample$Italian <- ifelse(w4_sample$srph_lang=="it", 1, 0)

ind_var <- c("srph_Female", "srph_Year",
             'Lake Geneva', 'Espace Mittelland', "North-West",
             "Zurich", "East", "Central",
             "Ticino", "German", "French", "Italian")
ind_var_labels <- str_remove(ind_var,"srph_")


# create empty vectors to fill
min <- quart1 <- median <- mean <- quart3 <- max <- rep(NA, length(ind_var))

# fill with values
for (i in 1:length(ind_var)) {
  variable <- which(names(w4_sample)==ind_var[i])
  pick_variable <- w4_sample[, variable]
  #nonmissing[i] <- length(pick_variable[!is.na(pick_variable)])
  min[i] <- min(w4_sample[,variable])
  #quart1[i] <- quantile(w4_sample[,variable], 0.25)
  median[i] <- median(w4_sample[,variable], na.rm = T)
  mean[i] <- mean(w4_sample[,variable], na.rm = T)
  #quart3[i] <-  quantile(w4_sample[,variable], 0.75)
  max[i] <- max(w4_sample[,variable])
}

variance_sum <- cbind(1:length(ind_var_labels),ind_var_labels, min, median, mean,  max) %>% data.frame()
#variance_sum$nonmissing <- as.numeric(variance_sum$nonmissing)
variance_sum$min <- as.numeric(variance_sum$min)
#variance_sum$quart1 <- as.numeric(variance_sum$quart1)
variance_sum$median <- as.numeric(variance_sum$median)
variance_sum$mean <- as.numeric(variance_sum$mean) %>% round(.,2)
#variance_sum$quart3 <- as.numeric(variance_sum$quart3)
variance_sum$max <- as.numeric(variance_sum$max)

compare <- variance_sum[, c(1,2,5)]
compare <- compare %>% dplyr::rename("Sample Mean" = mean)

# Summary Descriptives of Respondents #

resp <- w4_sample[w4_sample$env_vote == 1,]

# create empty vectors to fill
min <- quart1 <- median <- mean <- quart3 <- max <- rep(NA, length(ind_var))

# fill with values
for (i in 1:length(ind_var)) {
  variable <- which(names(resp)==ind_var[i])
  pick_variable <- resp[, variable]
  #nonmissing[i] <- length(pick_variable[!is.na(pick_variable)])
  min[i] <- min(resp[,variable])
  #quart1[i] <- quantile(resp[,variable], 0.25)
  median[i] <- median(resp[,variable], na.rm = T)
  mean[i] <- mean(resp[,variable], na.rm = T)
  #quart3[i] <-  quantile(resp[,variable], 0.75)
  max[i] <- max(resp[,variable])
}


variance_sum <- cbind(1:length(ind_var_labels),ind_var_labels, min, median, mean,  max) %>% data.frame()
variance_sum$min <- as.numeric(variance_sum$min)
variance_sum$median <- as.numeric(variance_sum$median)
variance_sum$mean <- as.numeric(variance_sum$mean) %>% round(.,2)
variance_sum$max <- as.numeric(variance_sum$max)

compare <- cbind(compare, variance_sum[, 5])
compare <- compare %>% dplyr::rename("Respondent Mean" = "variance_sum[, 5]")
compare$region <- c("Gender", "Birth year", rep("Region", 7), rep("Language",3))

library(gt)
table_ind_var <- compare[,-1] %>%
  gt(
    rowname_col = "ind_var_labels",
    groupname_col = "region") %>%
  # format decimals
fmt_number(rows = contains("Year"), decimals = 0, use_seps = F) %>%
  # header
  tab_header(
    title = md("**Summary Statistics of Sample vs. Respondents**")) %>%
  # set alignment as per wish
  cols_align(align = "left",
             columns = everything()) %>%
  opt_align_table_header(align = "left") %>%
  # set column widths
  cols_width(everything() ~ px(150)) #%>%

gtsave(table_ind_var, "Figures/TableA2.pdf",
       expand = 8)

# Table A.8 ####
# voters' placements of parties #
# SELECTS data available from FORS.
d <- readstata13::read.dta13("../../data/selects/data/1179_Selects2019_PES_Data_v1.1.0.dta",
                             convert.factors = FALSE )

dp <- readstata13::read.dta13("../../data/selects/data/1184_Selects2019_Panel_Data_v5.0.dta",
                              convert.factors = FALSE )

### party placement analysis

## cross-section

# keep only online people - imode == 1
d <- d[d$imode==1,]

dr <- dplyr::select(d, starts_with("f15130")) # respondent * left-right-rating
# share non-missing
nonmisslr <- apply(dr, 2, function(x) {mean(!is.na(x))})
names(nonmisslr) <- c("FDP","CVP","SP","SVP","GPS", "GLP","BDP","Lega","MCG")

# van der Eijk agreement score
agrlr <- apply(dr, 2, function(x) {agreement(agrmt::collapse(x))})
names(agrlr) <- c("FDP","CVP","SP","SVP","GPS", "GLP","BDP","Lega","MCG")

## panel

# policy positions of parties for environment
selects <- dp 
# keep only respondents who replied in wave 3)
selects <- selects[!is.na(selects$W3_intstart),]

# repsondents only answered two of five policy items (env one of them)
selects$env <- NA
selects$env[selects$W2_f15340ri1==4] <- 1
selects$env[selects$W2_f15340ri2==4] <- 1
selects$env[selects$W2_f15340ri1!=4&selects$W2_f15340ri2!=4] <- 0
table(selects$env, exclude = NULL)

subset_select <- selects[,c("id", "W3_f15360d", "W3_f15360rp1",
                            "W3_f15370d", "W3_f15370rp2", 
                            "env")]
names(subset_select) <- c("id", "x1", "y1", "x2", "y2", "env")
# long data, two obs per person
selects_long <- subset_select %>% pivot_longer(cols = !c(id, env),
                                               names_pattern = "(.)(.)",
                                               names_to = c(".value", "set"))
names(selects_long) <- c("id", "env", "set",  "position", "party")
rm(selects); rm(subset_select)

table(selects_long$position, selects_long$env, exclude = NULL)
# some respondents did not see env as issue in w2 but still responded - coding error?

# keep only those with env == 1
selects_long <- selects_long %>% filter(., env==1)
table(selects_long$position, selects_long$env, exclude = NULL)

table(selects_long$position, exclude = NULL)
#1 sehr dafür, 2, eher dafür, 3 weder noch, 4, eher dagegen,
# 5 sehr dagegen, 8 weiß nicht, -99 keine Antwort
selects_long$position[selects_long$position%in%c(-99, 8)] <- NA

## all respondents
# only keep parties for whom we have pid
nonmissenv <- c(
  mean(!is.na(selects_long$position[selects_long$party == 1])),
  mean(!is.na(selects_long$position[selects_long$party == 2])),
  mean(!is.na(selects_long$position[selects_long$party == 3])),
  mean(!is.na(selects_long$position[selects_long$party == 4])),
  mean(!is.na(selects_long$position[selects_long$party == 5]))
)
names(nonmissenv) <- c("FDP" , "CVP", "SP","SVP" , "GPS")

agrenv <- c(
  agreement(agrmt::collapse(selects_long$position[selects_long$party == 1])),
  agreement(agrmt::collapse(selects_long$position[selects_long$party == 2])),
  agreement(agrmt::collapse(selects_long$position[selects_long$party == 3])),
  agreement(agrmt::collapse(selects_long$position[selects_long$party == 4])),
  agreement(agrmt::collapse(selects_long$position[selects_long$party == 5]))
)
names(agrenv) <- c("FDP" , "CVP", "SP","SVP" , "GPS")

# export as table 
nonmisslr_compl <- c(nonmisslr[1:5], mean(nonmisslr[1:5])) %>% round(., 2)
names(nonmisslr_compl)[6] <- "mean"
nonmissenv_compl <- c(nonmissenv[1:5], mean(nonmissenv[1:5])) %>% round(., 2)
names(nonmissenv_compl)[6] <- "mean"

nonmiss_comp <- rbind(nonmissenv_compl, nonmisslr_compl)

agrlr_compl <- c(agrlr[1:5], mean(agrlr[1:5])) %>% round(., 2)
names(agrlr_compl)[6] <- "mean"
agrenv_compl <- c(agrenv[1:5], mean(agrenv[1:5])) %>% round(., 2)
names(agrenv_compl)[6] <- "mean"

agr_comp <- rbind(agrenv_compl, agrlr_compl)

comp <- rbind(nonmiss_comp, agr_comp) %>% as.data.frame(row.names = F)
comp$analysis <- c("Non-missings environment", 
                   "Non-missings left-right", 
                   "Agreement environment", 
                   "Agreement left-right")

comp <- comp[,c(7,6, 1:5)]

table_comp <- comp %>%
  gt() %>%
  # header
  tab_header(
    title = md("**Non-missings and agreement for environmental and left-right party placement**")) %>%
  # set alignment as per wish
  cols_align(align = "left",
             columns = everything()) %>%
  opt_align_table_header(align = "left") %>%
  # set column widths
  cols_width(contains("analysis") ~ px(150)) #%>%

gtsave(table_comp, "Figures/TableA8.png",
       expand = 9)
gtsave(table_comp, "Figures/TableA8.pdf",
       expand = 9)
